/*The SAS program HEI-2020 Population Ratio Method using NHANES 2017-2018 data and FPED */

/*  NOTE: Analysts should consider whether they want to include or exclude nutritional supplements at the food item level
 and this example does not contain code to remove those items.  */

/*task 1.	Create a folder on your computer "home folder", and save the FPED data, NHANES data, Demographic data, and the required HEI-2020 macro in it. 
  Specify the path to the folder. */
%let home = C:\Users\Documents\FPED_NHANES; 
/*In this Example, the "home" folder is in C Drive, within Documents, and is called FPED_NHANES. */


/*task 2.	Libnames here specify the input files. */
libname NH "&home\NH";  
libname FPED "&home\FPED"; 
/*In this Example, the FPED data are in a folder called "FPED", and the NHANES and Demographic data are in a folder called "NH", all saved within the "home" folder. 
  These are SAS datasets. */


/*task 3.	Create a folder in the "Home" folder, where the output file, containing HEI-2020 component and total scores for each respondent, for the intake day, 
  are to be exported. Specify the name of the folder. */
filename RES "&home\RES\hei2020r.csv"; 
/*In this Example, the folder is called "RES", within the "home" folder, and the exported results will be a csv file called "hei2020r". */


/*task 4.	Read in required HEI-2020 scoring macro. This macro must be saved within the home folder. */
%include "&home\hei2020.score.macro.sas";

/*NOTE: Once you have completed all the tasks above, all you need to do is run the SAS program below. Unless you used different names for your datasets and folders, 
  no other action is required from you. */
      
title 'HEI-2020 population ratio scores for NHANES 2017-2018 day 1, AGE >= 2, RELIABLE DIETS, Include Pregnant and Lactating Women';

/*Section (I): Calculations at the individual participant level to obtain variables needed to calculate HEI-2020 scores.*/

*part a: Reads in FPED data;
data FPED;
 set FPED.fped_dr1tot_1718;
run;

*part b: Reads in NHANES Nutrient data;
data NUTRIENT (keep=SEQN WTDRD1 DR1TKCAL DR1TSFAT DR1TALCO DR1TSODI DR1DRSTZ DR1TMFAT DR1TPFAT);
  set NH.DR1TOT_J;
  if DR1DRSTZ=1; /*reliable dietary recall status*/
run;

*part c: Reads in NHANES Demographic data;
data DEMO (keep=SEQN RIDAGEYR RIAGENDR SDDSRVYR SDMVPSU SDMVSTRA);
  set NH.DEMO_J;
  if RIDAGEYR >= 2;
run;


*part d : Combines the FPED, Nutrient, and Demographic datasets to create the COHORT dataset.;

proc sort data=FPED;
  by SEQN;
run;

proc sort data=NUTRIENT;
  by SEQN;
run;

proc sort data=DEMO;
  by SEQN;
run;

data COHORT;
  merge NUTRIENT (in=N) DEMO (in=D) FPED;
  by SEQN;
  if N and D;
run;


*part e : Creates additional required variables:  FWHOLEFRT, MONOPOLY, PFALLPROTLEG, PFSEAPLANTLEG, VTOTALLEG, and VDRKGRLEG. ;


data COHORT;
  set COHORT;
  by SEQN;

  FWHOLEFRT=DR1T_F_CITMLB+DR1T_F_OTHER;

  MONOPOLY=DR1TMFAT+DR1TPFAT;

  VTOTALLEG=DR1T_V_TOTAL+DR1T_V_LEGUMES;
  VDRKGRLEG=DR1T_V_DRKGR+DR1T_V_LEGUMES;

  PFALLPROTLEG=DR1T_PF_MPS_TOTAL+DR1T_PF_EGGS+DR1T_PF_NUTSDS+DR1T_PF_SOY+DR1T_PF_LEGUMES; 
  PFSEAPLANTLEG=DR1T_PF_SEAFD_HI+DR1T_PF_SEAFD_LOW+DR1T_PF_NUTSDS+DR1T_PF_SOY+DR1T_PF_LEGUMES;

     
run; 

/*Section (II): Calculation of weighted means and a variance-covariance matrix and generation of a Monte Carlo 
dataset, enabling standard errors to be calculated.*/


/*i. Calculate the weighted means and the variance/covariance matrix for the dietary variables of interest. */
data ONE;
    set COHORT;
    array comp (14) DR1TKCAL VTOTALLEG VDRKGRLEG DR1T_F_TOTAL FWHOLEFRT DR1T_G_WHOLE DR1T_D_TOTAL 
      PFALLPROTLEG PFSEAPLANTLEG MONOPOLY DR1TSFAT DR1TSODI DR1T_G_REFINED DR1T_ADD_SUGARS;
    * Turn each variable into an observation of the single variable VBL ;
    * keeping track of the order with the dum_num variable; 
    do i = 1 to 14;
        VBL = comp(i);
        dum_num = i;
        output;
        end;
run;
 
data ONE;
    set ONE;
    * Create dummies with the same names as the original variables;
    * The i-th dummy gets a 1 if the observation is associated with the
    * i-th original variable;
    array comp (14) DR1TKCAL VTOTALLEG VDRKGRLEG DR1T_F_TOTAL FWHOLEFRT DR1T_G_WHOLE DR1T_D_TOTAL 
      PFALLPROTLEG PFSEAPLANTLEG MONOPOLY DR1TSFAT DR1TSODI DR1T_G_REFINED DR1T_ADD_SUGARS; 
    do i = 1 to 14;
        if dum_num = i then comp(i) = 1;
        else comp(i) = 0;
        end;
    drop i dum_num;
run;
* Now run PROC (SURVEY)REG with dependent variable VBL and all the dummies;
* as predictors. Force the regression to not include an intercept, and ask;
* for the covariance matrix of the parameter estimates. ; 
 
proc surveyreg data=ONE; 
    strata SDMVSTRA; 
    cluster SDMVPSU; 
    weight WTDRD1; 
    model VBL= DR1TKCAL VTOTALLEG VDRKGRLEG DR1T_F_TOTAL FWHOLEFRT DR1T_G_WHOLE DR1T_D_TOTAL 
      PFALLPROTLEG PFSEAPLANTLEG MONOPOLY DR1TSFAT DR1TSODI DR1T_G_REFINED DR1T_ADD_SUGARS/noint covb; 
    * Output the covariance matrix we wanted all along; 
    ods output COVB=CSD_COV; 
    title2 "Tricking SURVEYREG into giving us the covariance matrix of means"; 
run; 
 
* this proc print is for verification and is not necessary; 
* uncomment next three lines if more information is desired; 
proc print data= CSD_COV;   
  title2 "Printout of csd_cov dataset -uses complex survey info"; 
run; 
 
 
*If curious about results then remove the noprint option and more ; 
* output will be displayed; 
proc means data= COHORT n min max mean ; *noprint; 
  weight WTDRD1; 
  var DR1TKCAL VTOTALLEG VDRKGRLEG DR1T_F_TOTAL FWHOLEFRT DR1T_G_WHOLE DR1T_D_TOTAL 
      PFALLPROTLEG PFSEAPLANTLEG MONOPOLY DR1TSFAT DR1TSODI DR1T_G_REFINED DR1T_ADD_SUGARS; 
  title2 'look at weighted means'; 
  output out=WTDM mean= ; 
run; 
 
data COVDATA (drop=Parameter); 
  set CSD_COV; 
  _TYPE_='COV '; 
  _NAME_=Parameter; 
run; 
 
data WTDM (drop=_TYPE_ _FREQ_); 
  set WTDM; 
run; 
 
data WTDM; 
  set WTDM; 
  _TYPE_='MEAN'; 
run; 
 
data COVDATA; 
  set COVDATA WTDM; 
run; 
 
* this proc print is for verification and is not necessary; 
* uncomment next three lines if more information is desired; 
proc print data=COVDATA; 
  title2 'input to simnorml'; 
run; 
 
/*ii.  A Monte Carlo data set with 10,000 rows is generated using the means 
 and variance/covariance matrix from step i*/ 
 
*seed value may be changed using a random number generator; 
proc simnormal data=COVDATA(type=cov) numreal=10000 seed=51230077 outseed out=SIM_DATA; 
  var DR1TKCAL VTOTALLEG VDRKGRLEG DR1T_F_TOTAL FWHOLEFRT DR1T_G_WHOLE DR1T_D_TOTAL 
      PFALLPROTLEG PFSEAPLANTLEG MONOPOLY DR1TSFAT DR1TSODI DR1T_G_REFINED DR1T_ADD_SUGARS; 
run; 
 
 
* this proc means is for verification and is not necessary; 
* uncomment next four lines if more information is desired; 
proc means data=SIM_DATA n nmiss min max mean stddev; 
    var DR1TKCAL VTOTALLEG VDRKGRLEG DR1T_F_TOTAL FWHOLEFRT DR1T_G_WHOLE DR1T_D_TOTAL 
      PFALLPROTLEG PFSEAPLANTLEG MONOPOLY DR1TSFAT DR1TSODI DR1T_G_REFINED DR1T_ADD_SUGARS; 
  title2 "Distributions of Simulated Data"; 
  run; 
 
* this proc print is for verification and is not necessary; 
* uncomment next three lines if more information is desired; 
proc print data= SIM_DATA (obs=20); 
  title2 "Listing of 20 Records from Simulated Data"; 
run; 
 
 

/*Section (III): Application of the HEI-2020 scoring algorithm and calculation of HEI-2020 component and 
total scores and their standard errors and confidence intervals. */

/* i. This step uses the Monte Carlo dataset and calls the HEI-2020 scoring macro which calculates intake density 
  amounts and HEI scores. */
 
 
%HEI2020 (indat= SIM_DATA, 
          kcal= DR1TKCAL, 
	  vtotalleg= VTOTALLEG, 
	  vdrkgrleg= VDRKGRLEG, 
	  f_total= DR1T_F_TOTAL, 
	  fwholefrt= FWHOLEFRT, 
	  g_whole= DR1T_G_WHOLE, 
	  d_total= DR1T_D_TOTAL, 
          pfallprotleg= PFALLPROTLEG, 
	  pfseaplantleg= PFSEAPLANTLEG, 
	  monopoly= MONOPOLY, 
	  satfat= DR1TSFAT, 
	  sodium= DR1TSODI, 
	  g_refined= DR1T_G_REFINED, 
	  add_sugars= DR1T_ADD_SUGARS, 
	  outdat= AFTERMAC); 


* this proc means is for verification and is not necessary; 
* uncomment next four lines if more information is desired; 
*proc means data=AFTERMAC n nmiss min max mean stddev; 
*  var VEGDEN GRBNDEN FRTDEN WHFRDEN WGRNDEN DAIRYDEN 
     PROTDEN SEAPLDEN FARATIO SODDEN RGDEN SFAT_PERC ADDSUG_PERC; 
*  title2 'after hei 2020 scoring macro'; 
*run; 
 

 
/*ii. Univariate and means procedures to compute one HEI-2020 total score and one set of HEI-2020 component scores and their 
  standard errors and confidence intervals for the group, subgroup, or population.   */ 

proc univariate data=AFTERMAC noprint; 
  var HEI2020_TOTALVEG HEI2020_GREEN_AND_BEAN HEI2020_TOTALFRUIT HEI2020_WHOLEFRUIT HEI2020_WHOLEGRAIN
      HEI2020_TOTALDAIRY HEI2020_TOTPROT HEI2020_SEAPLANT_PROT HEI2020_FATTYACID HEI2020_SODIUM
      HEI2020_REFINEDGRAIN HEI2020_SFAT HEI2020_ADDSUG HEI2020_TOTAL_SCORE; 
  output out=ci pctlpts=2.5 97.5 pctlpre=h1_ h2_ h3_ h4_ h5_ h6_ h7_ h8_ h9_ h10_ h11_ h12_ h13_ totscore_; 
run; 
 
proc means data=AFTERMAC noprint; 
  var HEI2020_TOTALVEG HEI2020_GREEN_AND_BEAN HEI2020_TOTALFRUIT HEI2020_WHOLEFRUIT HEI2020_WHOLEGRAIN
      HEI2020_TOTALDAIRY HEI2020_TOTPROT HEI2020_SEAPLANT_PROT HEI2020_FATTYACID HEI2020_SODIUM
      HEI2020_REFINEDGRAIN HEI2020_SFAT HEI2020_ADDSUG HEI2020_TOTAL_SCORE; 
  output out=stat min=h1_min h2_min h3_min h4_min h5_min h6_min h7_min h8_min h9_min h10_min h11_min h12_min h13_min totscore_min 
      max=h1_max h2_max h3_max h4_max h5_max h6_max h7_max h8_max h9_max h10_max h11_max h12_max h13_max totscore_max 
      mean=h1_mean h2_mean h3_mean h4_mean h5_mean h6_mean h7_mean h8_mean h9_mean h10_mean h11_mean h12_mean h13_mean totscore_mean 
      stddev= h1_stddev h2_stddev h3_stddev h4_stddev h5_stddev h6_stddev h7_stddev h8_stddev h9_stddev h10_stddev h11_stddev h12_stddev h13_stddev totscore_stddev;   
run; 
 

*add a key variable to merge two datasets - each with only one observation;
data CI;
  set CI;
  key=1;
  run;

data STAT;
  set STAT;
  key=1;
  run;

data ALL; 
  merge CI STAT; 
  by key;
run; 
 
data RESULT (keep=score slabel min max mean stderr lowerci upperci); 
  set ALL; 
  score='HEI2020x1          '; 
  slabel='HEI-2020 COMPONENT 1 TOTAL VEGETABLES'; 
  min=h1_min; 
  max=h1_max; 
  mean=h1_mean; 
  stderr=h1_stddev; 
  lowerci=h1_2_5; 
  upperci=h1_97_5; 
  output RESULT; 
  score='HEI2020x2'; 
  slabel='HEI-2020 COMPONENT 2 GREENS AND BEANS'; 
  min=h2_min; 
  max=h2_max; 
  mean=h2_mean; 
  stderr=h2_stddev; 
  lowerci=h2_2_5; 
  upperci=h2_97_5; 
  output RESULT; 
  score='HEI2020x3'; 
  slabel='HEI-2020 COMPONENT 3 TOTAL FRUIT'; 
  min=h3_min; 
  max=h3_max; 
  mean=h3_mean; 
  stderr=h3_stddev; 
  lowerci=h3_2_5; 
  upperci=h3_97_5; 
  output RESULT; 
  score='HEI2020x4'; 
  slabel='HEI-2020 COMPONENT 4 WHOLE FRUIT'; 
  min=h4_min; 
  max=h4_max; 
  mean=h4_mean; 
  stderr=h4_stddev; 
  lowerci=h4_2_5; 
  upperci=h4_97_5; 
  output RESULT; 
  score='HEI2020x5'; 
  slabel='HEI-2020 COMPONENT 5 WHOLE GRAINS'; 
  min=h5_min; 
  max=h5_max; 
  mean=h5_mean; 
  stderr=h5_stddev; 
  lowerci=h5_2_5; 
  upperci=h5_97_5; 
  output RESULT; 
  score='HEI2020x6'; 
  slabel='HEI-2020 COMPONENT 6 DAIRY'; 
  min=h6_min; 
  max=h6_max; 
  mean=h6_mean; 
  stderr=h6_stddev; 
  lowerci=h6_2_5; 
  upperci=h6_97_5; 
  output RESULT; 
  score='HEI2020x7'; 
  slabel='HEI-2020 COMPONENT 7 TOTAL PROTEIN FOODS'; 
  min=h7_min; 
  max=h7_max; 
  mean=h7_mean; 
  stderr=h7_stddev; 
  lowerci=h7_2_5; 
  upperci=h7_97_5; 
  output RESULT; 
  score='HEI2020x8'; 
  slabel='HEI-2020 COMPONENT 8 SEAFOOD AND PLANT PROTEIN'; 
  min=h8_min; 
  max=h8_max; 
  mean=h8_mean; 
  stderr=h8_stddev; 
  lowerci=h8_2_5; 
  upperci=h8_97_5; 
  output RESULT; 
  score='HEI2020x9'; 
  slabel='HEI-2020 COMPONENT 9 FATTY ACID RATIO'; 
  min=h9_min; 
  max=h9_max; 
  mean=h9_mean; 
  stderr=h9_stddev; 
  lowerci=h9_2_5; 
  upperci=h9_97_5; 
  output RESULT; 
  score='HEI2020x10'; 
  slabel='HEI-2020 COMPONENT 10 SODIUM'; 
  min=h10_min; 
  max=h10_max; 
  mean=h10_mean; 
  stderr=h10_stddev; 
  lowerci=h10_2_5; 
  upperci=h10_97_5; 
  output RESULT; 
  score='HEI2020x11'; 
  slabel='HEI-2020 COMPONENT 11 REFINED GRAINS'; 
  min=h11_min; 
  max=h11_max; 
  mean=h11_mean; 
  stderr=h11_stddev; 
  lowerci=h11_2_5; 
  upperci=h11_97_5; 
  output RESULT; 
  score='HEI2020x12'; 
  slabel='HEI-2020 COMPONENT 12 SATURATED FAT'; 
  min=h12_min; 
  max=h12_max; 
  mean=h12_mean; 
  stderr=h12_stddev; 
  lowerci=h12_2_5; 
  upperci=h12_97_5; 
  output RESULT; 
  score='HEI2020x13'; 
  slabel='HEI-2020 COMPONENT 13 ADDED SUGAR'; 
  min=h13_min; 
  max=h13_max; 
  mean=h13_mean; 
  stderr=h13_stddev; 
  lowerci=h13_2_5; 
  upperci=h13_97_5; 
  output RESULT; 
  score='TOTAL HEI 2020'; 
  slabel='TOTAL HEI-2020 SCORE'; 
  min=totscore_min; 
  max=totscore_max; 
  mean=totscore_mean; 
  stderr=totscore_stddev; 
  lowerci=totscore_2_5; 
  upperci=totscore_97_5; 
  output RESULT; 
run; 
 
/*Section (IV): Displays and saves the results in specified output folder.*/

/*i. The program saves total score and set of component scores for the population/group of interest, together with minimum and maximum values, standard errors 
   and confidence intervals.  An option is provided to export the results into a CSV file that can be opened in Excel. */
  
proc export data=RESULT 
  file=RES 
  dbms=csv 
  replace; 
run; 
 
/*ii. Print results - this step is included as a data check.  The min and max can be compared to the bounds of HEI-2020 scores – if any scores <0 or >100, this is a red flag.  */ 

proc print data=RESULT; 
  id score; 
  var slabel min max mean stderr lowerci upperci; 
  title2 'complex survey design population method - mean and confidence interval of HEI-2020 using NH 17/18 data'; 
run; 
 

 
 
 

